Contents
Load Packages
library(DESeq2)
library(tximport)
library(biomaRt)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(readxl)
library(Rtsne)
library(pheatmap)
library(IHW)
library(Questools)
library(dplyr)
library(grid)
library(gridExtra)
library(jyluMisc)
bioMart
ensembl.mart <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
annotations <-
getBM(
attributes = c(
'hgnc_symbol',
'refseq_mrna',
'transcript_biotype'
),
mart = ensembl.mart
)
annotations <- annotations %>% filter(hgnc_symbol != "" & refseq_mrna != "")
Read Counts from Salmon
files <- file.path("../data/alignments/", list.files("../data/alignments"),"quant.sf")
sample.ids <- list.files("../data/alignments") %>% map(function(x) gsub("_sequence", "", strsplit(x, split ="lane8")[[1]][2]))
names(files) <- sample.ids
tx2gene <- annotations %>% dplyr::select(refseq_mrna, hgnc_symbol)
txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)
DESeq Dataset
sample.table <-
data.frame(row.names = sample.ids,
fileName = list.files("../data/alignments/"))
dds <- DESeqDataSetFromTximport(txi.salmon, sample.table, design = ~1)
Annotating Samples
sample.list <- read_xlsx("../data/metadata/sampleList.xlsx", sheet = 2)
plate <- sample.list$`Plate well number`
id <- sample.list$ID
sample.name<- sample.list$`Sample Name`
cell_line <- sample.name %>% map(function(x) ifelse(length(strsplit(x, split = "_")[[1]]) == 4, paste(strsplit(x, split = "_")[[1]][c(2,3)], collapse = "_"), strsplit(x, split = "_")[[1]][2]))
cell_line <- unlist(cell_line)
treatment <- sample.name %>% map(function(x) ifelse(length(strsplit(x, split = "_")[[1]]) == 4, paste(strsplit(x, split = "_")[[1]][4], collapse = "_"), strsplit(x, split = "_")[[1]][3]))
treatment <- unlist(treatment)
sample.annotations <- data.frame(row.names = id, cell_line = as.character(cell_line), treatment = as.character(treatment), plate = plate)
# write.csv(sample.annotations, file = "../data/metadata/sample_annotations.csv", row.names = T)
colData(dds) <- cbind(colData(dds), sample.annotations[rownames(colData(dds)),])
Annotating Transcripts
gene.annotations <- annotations[match(rownames(dds), annotations$hgnc_symbol),]
rownames(gene.annotations) <- rownames(dds)
rowData(dds) <- gene.annotations
save(dds, file = "../data/RData/3RNAseq.RData")
GGM on Expression Matirix
dds <- estimateSizeFactors(dds)
dds <- dds[which(rowSums(counts(dds)) > 50),]
dds.vst <- varianceStabilizingTransformation(dds, blind = TRUE)
exprMat <- assay(dds.vst)
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:5000],]
g <- ggm(data.frame(exprMat), rho = 0.5)
g$network
PCA on Expression Matirix
pcRes <- prcomp(t(exprMat), scale. = FALSE, center = TRUE)
eigs <- pcRes$sdev^2
eigs <- eigs/sum(eigs)
plotTab <- data.frame(pcRes$x[,c(1,2)])
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]
ggplot(plotTab, aes(x=PC1, y = PC2, label = sampleID, color = cellLine, shape = treatment)) +
geom_point() +
geom_text_repel(aes(PC1,PC2, label = sampleID)) +
theme_bw()

t-SNE on Expression Matirix
tsneRun <- function(distMat,perplexity=10,theta=0,max_iter=5000) {
tsneRes <- Rtsne(distMat, perplexity = perplexity, theta = theta,
max_iter = max_iter, is_distance = TRUE, dims =2)
tsneRes <- tsneRes$Y
rownames(tsneRes) <- labels(distMat)
colnames(tsneRes) <- c("x","y")
tsneRes
}
distViab <- dist(t(exprMat))
plotTab <- data.frame(tsneRun(distViab, perplexity = 2, max_iter = 10000))
#plot
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]
ggplot(plotTab, aes(x=x, y = y, label = sampleID, color = cellLine, shape = treatment)) +
geom_point() +
geom_text_repel(aes(x,y, label = sampleID)) +
theme_bw()

Sample Similarities
colAnno <- data.frame(plotTab[,c("cellLine", "treatment")])
corMat <- cor(exprMat)
pheatmap(corMat, annotation_col = colAnno)

Differential Expressed Genes between Treatments
dds$id <- unlist(sample.ids)
dds$cell_line <- sample.annotations[dds$id, "cell_line"]
dds$treatment <- sample.annotations[dds$id, "treatment"]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line
ddsCell <- DESeq(dds)
results.cell <- results(ddsCell, contrast = c("cell_line" , "Raji", "Namalwa"))
results.df <- as.data.frame(results.cell)
summary(results.df)
## baseMean log2FoldChange lfcSE
## Min. : 1.21 Min. :-36.12637 Min. :0.0645
## 1st Qu.: 10.25 1st Qu.: -0.69593 1st Qu.:0.2508
## Median : 54.29 Median : 0.09153 Median :0.7023
## Mean : 414.27 Mean : -0.08061 Mean :1.8143
## 3rd Qu.: 217.68 3rd Qu.: 0.57359 3rd Qu.:2.0698
## Max. :115330.63 Max. : 31.50970 Max. :8.1608
##
## stat pvalue padj
## Min. :-27.46395 Min. :0.00000 Min. :0.0000
## 1st Qu.: -1.16058 1st Qu.:0.01613 1st Qu.:0.0252
## Median : 0.06312 Median :0.28527 Median :0.3964
## Mean : -0.02558 Mean :0.37789 Mean :0.4437
## 3rd Qu.: 0.97044 3rd Qu.:0.72498 3rd Qu.:0.8615
## Max. : 26.32492 Max. :0.99987 Max. :0.9998
## NA's :1507
res.05 <- results(ddsCell, alpha = 0.05)
table(res.05$padj < 0.05)
##
## FALSE TRUE
## 6945 3729
resIHW <- results(ddsCell, filterFun=ihw)
summary(resIHW)
##
## out of 11105 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 2127, 19%
## LFC < 0 (down) : 2031, 18%
## outliers [1] : 0, 0%
## [1] see 'cooksCutoff' argument of ?results
## see metadata(res)$ihwResult on hypothesis weighting
results.cell <- list()
design(dds) <- ~treatment
print("******")
## [1] "******"
dds.cells <- dds$cell_line %>% map(function(x) dds[,dds$cell_line == x]) %>% map(function(x) x[apply(counts(x), 1, function(x) all(x > 10)),])
results.cell <- dds.cells %>% map(function(x) DESeq(x)) %>% map(function(x) results(x, contrast = c("treatment", "sgTP53", "NT")))
names(results.cell) <- dds$cell_line
P-Value Distributions for Treatment
plotList <- list()
results.cell %>% map(function(x) tibble(pval = x$pvalue)) %>% map(function(x) ggplot(x, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
xlab("raw p values")) -> plots
plotList<- 1:length(plots) %>% map(function(x) plots[[x]] + ggtitle(sprintf("sgTP53 vs NT in %s", names(results.cell)[x])))
names(plotList) <- names(results.cell)
plotList
## $BL60

##
## $Ramos

##
## $Ramos

##
## $Namalwa

##
## $Namalwa

##
## $Raji

##
## $Raji

##
## $BJAB

##
## $BJAB

##
## $HBL2

##
## $Namalwa_Old

##
## $HBL2

##
## $Maver1

##
## $Maver1

##
## $THP1

##
## $THP1

##
## $RPMI8226

##
## $RPMI8226

##
## $LY47

##
## $LY47

##
## $Seraphine

##
## $Namalwa_Old

##
## $Seraphine

##
## $Raji_Old

##
## $Raji_Old

##
## $Ramos_Old

##
## $Ramos_Old

##
## $BL60_Old

##
## $BL60_Old

##
## $BL60

Enrichment Analysis for Different Treatments
# createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
# if (ifFDR) {
# inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>% filter(padj <= pCut)
# } else {
# inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>%filter(pvalue <= pCut)
# }
#
# inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
# rownames(inputTab) <- inputTab$symbol
# inputTab$symbol <- NULL
# colnames(inputTab) <- "stat"
# return(inputTab)
# }
#
# gmts <- list(H=system.file("extdata","h.all.v5.1.symbols.gmt",package="BloodCancerMultiOmics2017"),
# C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
# KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))
#
#
# results.cell %>% map(function(x) createInput(x, pCut = 0.1, ifFDR = FALSE)) %>% map(function(x) names(gmts) %>% map(function(y) runGSEA(inputTab =x, gmtFile = gmts[[y]], GSAmethod = "page"))) -> p
#
# p %>% map(function(x) x[[1]]) -> res.Halmark
# p %>% map(function(x) x[[2]]) -> res.Oncogenetic
# p %>% map(function(x) x[[3]]) -> res.KEGG
# res.Halmark
# res.Oncogenetic
# res.KEGG
# enBar.Halmark <- plotEnrichmentBar(res.Halmark, pCut = 0.05, ifFDR = FALSE, setName = "HALLMARK")
# plot(enBar.Halmark[1:20])
# enBar.Oncogenetic <- plotEnrichmentBar(res.Oncogenetic, pCut = 0.05, ifFDR = FALSE, setName = "Oncogenetic")
# plot(enBar.Oncogenetic[1:20])
# enBar.KEGG <- plotEnrichmentBar(res.KEGG, pCut = 0.05, ifFDR = FALSE, setName = "KEGG")
# plot(enBar.KEGG[1:20])
P-Value Distributions for all Cell Lines
design(dds) <- ~ cell_line + treatment
dds <- DESeq(dds)
res.all <- results(dds, contrast = c("treatment", "sgTP53", "NT"))
plotTab <- tibble(pval = res.all$pvalue)
p <- ggplot(plotTab, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
ggtitle("sgTP53 vs NT in all cell lines") +
xlab("raw p values")
p

Enrichment Analysis for Different Treatments for all Cell Lines
# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
# enRes$Name <- gsub("HALLMARK_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "HALLMARK")
# plot(enBar)
# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
# enRes$Name <- gsub("Oncogenetic_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = 0.11, ifFDR = FALSE, setName = "Oncogenetic")
# plot(enBar)
# inputTab <- createInput(res.all, pCut = 0.1, ifFDR = TRUE)
# enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
# enRes$Name <- gsub("KEGG_","", enRes$Name)
# enRes <- list(enRes)
# names(enRes) <- "sgTP53"
# enBar <- plotEnrichmentBar(enRes, pCut = .1, ifFDR = FALSE, setName = "KEGG")
# plot(enBar)